In [2]:
    
from __future__ import division
import random
    
In [4]:
    
def split_data(data, prob):
    """splits the data into fractions [prob, 1-prob]"""
    results = [], []
    
    for row in data:
        results[0 if random.random() < prob else 1].append(row)
    return results
def train_test_split(x, y, test_pct):
    data = zip(x,y)
    train, test = split_data(data, 1- test_pct)
    
    # unzip trick
    x_train, y_train =  zip(*train)
    x_test, y_test = zip(*test)
    
    return x_train, x_test, y_train, y_test
    
In [8]:
    
def accuracy(tp, fp, fn, tn):
    correct = tp + tn
    total = tp + fp + fn + tn
    return correct / total
def precision(tp, fp, fn, tn):
    return tp / (tp + fp)
def recall(tp, fp, fn, tn):
    return tp / (tp + fn)
def f1_scoare(tp, fp, fn, tn):
    p = precision(tp, fp, fn, tn)
    r = recall(tp, fp, fn, tn)
    
    return 2 * p * r / (p + r)
    
In [9]:
    
# bias is poor performance even on training
# variance is poor performance across many different trainings
# bias can be solved by adding features
# varaince can be solved by removing features or getting data
    
In [ ]: